12 organoid lines were screened with about 70 compounds (5 concentrations) of the clinical cancer panel. After 7 days total (4 days of treatment) the organoids we lysed and a ctg assay was performed. The experiment was conducted in 2 replciates.
#library(tidyverse)
library(readr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(corrr)
library(pheatmap)
library(gridExtra)
library(EBImage) #cite
library(HTSvis) #cite
library(PharmacoGx) #cite
library(growthcurve) #cite
library(jsonlite)
library(httr)
To systematically link compounds to a molecular target the DGIDB database is used. Library compounds are linked to affected genes. The result table is manually curated and subsequently loaded.
return_dgidb <- function(input, type = "genes"){
url = "http://dgidb.genome.wustl.edu"
path = paste0("/api/v1/interactions.json",
"?", type,"=",input %>% str_replace_all(., " ", ""))
response <- GET(url = url, path = path)
response$content %>%
rawToChar() %>%
fromJSON() -> temp
do.call(what = "rbind",
args = lapply(temp, as.data.frame)) -> temp
if(nrow(temp) > 0 & !("suggestions" %in% colnames(temp))){
as.data.frame(temp$interactions) %>%
as_tibble() %>%
select(contains("Name"), source, interactionType, contains("Id")) %>%
mutate(input = input,
alt_input = NA,
input_type = type) %>%
return()
} else if(("suggestions" %in% colnames(temp)) & (temp$suggestions %>% unlist %>% is.null() != TRUE)){
input.alt <- temp$suggestions %>% unlist %>% .[1]
cat(paste0(c("corrected ", input, "to", input.alt, "\n")))
path.alt = paste0("/api/v1/interactions.json",
"?", type,"=",input.alt %>% str_replace_all(., " ", ""))
GET(url = url, path = path.alt) %>%
.$content%>%
rawToChar() %>%
fromJSON() -> temp.alt
do.call(what = "rbind",
args = lapply(temp.alt, as.data.frame)) -> temp.alt
if(nrow(temp.alt) > 0 & !("suggestions" %in% colnames(temp.alt))){
as.data.frame(temp.alt$interactions) %>%
as_tibble() %>%
select(contains("Name"), source, interactionType, contains("Id")) %>%
mutate(input = input,
alt_input = input.alt,
input_type = type) %>%
return()
}
} else
cat(paste0(c("could not find ", input, "\n")))
}
drug_targets.list <- lapply(auc_ccp$drug %>% unique, return_dgidb, type = "drugs")
drug_targets <- do.call(what = "rbind",
args = lapply(drug_targets.list, as.data.frame)) %>%
#ugly way to fix the problem of changing names (geneName or drugName depending on the input type)
group_by_(colnames(.)[1], "source", "input") %>%
summarize(count = n()) %>%
group_by_(colnames(.)[1], "input") %>%
summarize(count = n()) %>%
filter(count > 0) %>%
group_by(input) %>%
summarise(target = geneName[count %>% which.max()],
count = max(count)) %>%
arrange(target)
#if(is.na() == FALSE){cut(x, c(0, 18.5, 24.9, 29.9, 34.9, 39.9, Inf), labels = FALSE)} else NA
#run rsync -ravP rindtorf@b110-sc2hn01:/collab-ag-fischer/PROMISE/ctg_data /Users/rindtorf/promise/rsync_data/
#define datasets to load ctg data into R
assay <- "ctg_data/HC1092"
#path <- '/Volumes/collab-bernd-fischer/PROMISE/'
path <- '/Volumes/Macintosh HD/Users/rindtorf/promise/rsync_data/'
pattern <- '.TXT'
filelist <- list.files(paste0(path, assay, "/"), pattern = pattern)
dir <- paste0(path, assay, "/")
#define a function to load ctg files into R once they match the given definitions
load_delim <- function(path, name){
read_delim(paste0(path, name),
"\t", escape_double = FALSE, col_names = FALSE,
trim_ws = TRUE)
}
#load ctg data into R
tmp_list <- lapply(filelist, load_delim, path = dir)
ctg <- do.call('rbind', tmp_list)
colnames(ctg) <- c('complete_barcode', 'Well_ID_384', 'photons')
#mutate ctg data to match the annotation file afterwards
ctg <- ctg %>%
mutate(complete_barcode.mut = complete_barcode) %>%
separate(complete_barcode.mut, c("date", "user", "mithras", "plate_barcode")) %>%
mutate(plate_barcode.mut = plate_barcode) %>%
separate(plate_barcode.mut, c("line", "plate", "library"), sep = c(7,11))
#load tidy dataset with gel and organoid annotation
#load the correct but ill-formatted annotation file
ccp_lib <- read_delim(paste0(path, "layouts/raw_layout_files/Clin_Cancer_Panel_V170511.csv"),
";", escape_double = FALSE, trim_ws = TRUE)
colnames(ccp_lib)[c(1, 3, 6)] <- c("Product Name", "concentration_factor", "Well_ID_384")
ccp_lib <- ccp_lib %>%
select(`Product Name`, Well_ID_384, concentration_factor)# %>%
# mutate(concentration_factor = replace(concentration_factor, ccp_lib$`Product Name` == "DMSO" | ccp_lib$`Product Name` == "Staurosporine_500nM", NA))
#the list of used compounds was manually changed
drug_targets <- read_delim("~/promise/local_data/annotation/drug_targets.csv",
";", escape_double = FALSE, trim_ws = TRUE)
colnames(ccp_lib)[1] <- "drug"
#Merge the annotation data of the ccp panel
ctg_ccp <- merge(ctg, ccp_lib, by = 'Well_ID_384') %>%
merge(., drug_targets, by.x = "drug", by.y = "input") %>%
select(-drug) %>%
mutate(drug = input_renamed) %>%
select(-input_renamed) %>%
mutate(control = if_else(is.na(concentration_factor), 1, 0),
drug_conc = paste0(drug, concentration_factor)) %>%
#replicate = if_else(date %in% c("170516", "170620", "170718", "170704" ), 2, 1)
mutate(Well_ID_384.mut = Well_ID_384) %>%
separate(Well_ID_384.mut, c("letter", "number"), sep = 1) %>%
mutate(number = as.numeric(number))
#Add a replicate count in a clumsy way
ctg_ccp <- ctg_ccp %>%
select(plate, line) %>%
group_by(line, plate) %>%
sample_n(1) %>%
mutate(plate.no = substr(plate, 2,4) %>% as.numeric) %>%
group_by(line, plate) %>%
summarise(replicate = if_else(plate.no %in% c(6, 12, 4 ), 1,2)) %>%
merge(ctg_ccp, ., by = c("line", "plate"))
#set wells with mistakes to NA and add a dummy variable for handling in HTSVis
ctg_ccp <- ctg_ccp %>%
mutate(photons = ifelse(plate_barcode == 'D004T01P006L08' & letter %in% LETTERS[c(1, 3, 5, 7, 9, 11, 13, 15)] & number %in% c(1:13), NA, photons)) %>%
mutate(dummy = 1)
#mutate(dat, dist = ifelse(speed == 4, dist * 100, dist)
tmp <- ctg_ccp #%>%
#filter(date != "170606")
save(tmp, file = paste0("/Volumes/Macintosh HD/Users/rindtorf/promise/local_data/ctg_ccp_unfiltered_", substr(Sys.time(),1,10), ".Rdata"))
#write results file
ctg_ccp <- ctg_ccp # %>%
#filter(line != "D022T01")
load(file = paste0("/Volumes/Macintosh HD/Users/rindtorf/promise/local_data/ctg_ccp_unfiltered_", substr(Sys.time(),1,10), ".Rdata"))
ctg_ccp <- tmp
The primary location of tumors is divided into three sights.
#import clinical data
cohort <- read_excel("~/promise/local_data/clinical_data/Clin_Data_Basic-cohort.xlsx") %>%
separate(ID, c("p", "no.line"), sep = 1) %>%
mutate(line = paste0("D", no.line, "T01")) %>% #only works if there were no re-biopsies
select(-p, -no.line)
cohort_short <- cohort %>%
select(line, Location) %>%
#select(line, age, Location, `T`, N, M, Stage, G) %>%
mutate(Patient = line) %>%
select(-line)
cohort_short$Location[cohort_short$Location == "Asc"] <- "Right"
cohort_short$Location[cohort_short$Location == "Sigm"] <- "Left"
cohort_short <- cohort_short %>%
mutate(Location = factor(Location, levels = c("Right", "Left", "Rectum")))
levels(cohort_short$Location)
## [1] "Right" "Left" "Rectum"
# Get lower triangle of the correlation matrix
get_lower_tri<-function(cormat){
cormat[upper.tri(cormat)] <- NA
return(cormat)
}
ctg_ccp %>%
dplyr::group_by(plate_barcode) %>%
mutate(median = median(photons, na.rm = TRUE)) %>%
mutate(treat.median = photons/median) %>%
#filter(date != "170606") %>%
select(plate_barcode, Well_ID_384, treat.median) %>%
spread(plate_barcode, treat.median) %>%
remove_rownames() %>%
column_to_rownames('Well_ID_384') %>%
cor(use = "pairwise.complete.obs") %>%
get_lower_tri() %>%
melt() %>%
ggplot(aes(x= Var1, y = Var2, fill = value)) +
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0.5, limit = c(0,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1))+
coord_fixed()
## Warning: Setting row names on a tibble is deprecated.
The 32 DMSO controls yield a reference median which is, across all plates, more robust and greater in relative size than a plate-wise median. Hence the DMSO control dependent median should be used for calibration of plates.
ctrl_tmp <- ctg_ccp %>%
mutate(control = ifelse(drug == "DMSO", "DMSO", "Treatment")) %>%
#filter(plate_barcode == "D020T01P004L08") %>%
filter(control == "DMSO") %>%
group_by(plate_barcode) %>%
summarise(ctrl.median = median(photons, na.rm = TRUE),
ctrl.mad = mad(photons, na.rm = TRUE)) %>%
arrange(plate_barcode)
noctrl_tmp <- ctg_ccp %>%
mutate(control = ifelse(drug == "DMSO", "DMSO", "Treatment")) %>%
#filter(plate_barcode == "D020T01P004L08") %>%
#filter(control == "DMSO") %>%
group_by(plate_barcode) %>%
summarise(median = median(photons, na.rm = TRUE),
mad = mad(photons, na.rm = TRUE)) %>%
arrange(plate_barcode)
#merge both tables to compare the different median references
ctrls <- merge(ctrl_tmp, noctrl_tmp, by = "plate_barcode")
#illustrate to what extent the overall plate median is shifted towards more negative values relative to the DMSO based plate median
ctrls %>%
mutate(d.median = (median - ctrl.median)/ctrl.median*100,
d.mad = (mad - ctrl.mad)/ctrl.mad*100,
ctrl.r.cov = ctrl.mad/ctrl.median,
r.cov = mad/median) %>%
ggplot(aes(d.median)) +
geom_density(fill = "blue", alpha = 0.7) +
xlab("% difference of overall and DMSO plate median across screen") +
theme_minimal()
#add the ctrl median as a variable to the ctg_ccp df
ctg_ccp <- merge(ctg_ccp, ctrls, by=c("plate_barcode")) %>%
#introduce relative photon counts
mutate(treat.ctrl.median = photons/ctrl.median,
treat.median = photons/median)
tmp <- ctg_ccp %>%
unite("line_well", c("line", "Well_ID_384")) %>%
select(line_well, replicate, treat.ctrl.median) %>%
spread(replicate, treat.ctrl.median)
tmp.cor<- tmp %>%
select(`1`,`2`) %>%
cor(use = "pairwise.complete.obs") %>%
.[1,2] %>%
round(2)
tmp %>%
ggplot(aes(x = `1`, y = `2`)) +
geom_point(alpha = 0.2) +
theme_minimal() +
ylab("Replicate 2") +
xlab("Replicate 1") +
ggtitle(paste0("Correlation of Normalized Treatment Reponse, r =", tmp.cor)) +
geom_abline(slope = 1) +
geom_density2d(alpha = 0.7)
## Warning: Removed 104 rows containing non-finite values (stat_density2d).
## Warning: Removed 104 rows containing missing values (geom_point).
#stat_binhex()
Load CMS subtype data and plot a heatmap with response profiles. The CMS subtype confindence has to be greater than 50% to be shown. The first heatmap shows how some response profiles cluster better by the date of their measurment than their organoid line. The second cluster shows the response profiles in relationship to relevant clinical data.
# import expression data
library(readxl)
subtypes_organoids <- read_excel("~/promise/local_data/expression/subtypes_organoids_28062017.xlsx") %>%
separate(X__1, c("p.line", "microarray"), sep = "_.", extra = "drop") %>%
separate(p.line, c("p", "no.line"), sep = 1) %>%
mutate(line = paste0("D", no.line, "01")) %>%
select(-p, -no.line)
#The _short table contains only one variable describing the CMS.
subtypes_short <- subtypes_organoids %>%
mutate(Patient = line,
# CMS = RF.nearestCMS,
CMS = RF.predictedCMS) %>%
select(Patient, CMS)
# Test if posterior probs give more information about the tested lines
# Generate annotations for columns
col = data.frame(
Patient = factor(rep(sort(unique(ctg_ccp$line)), each = 2))
)
col <- merge(col, subtypes_short, by = "Patient", all.x = TRUE)
col <- merge(col, cohort_short, by = "Patient", all.x = TRUE)
col <- col %>%
mutate(Patient = substr(Patient, 1,5)) %>%
select(Patient, Location, CMS)
col <- ctg_ccp %>%
select(plate_barcode, date, mithras, user) %>%
group_by(plate_barcode) %>%
sample_n(1) %>%
ungroup() %>%
select(-plate_barcode) %>%
cbind(col, .)
rownames(col) = unique(sort(ctg_ccp$plate_barcode))
col_temp <- col %>%
select(Patient, date)
#Generate annotation for rows
row <- ctg_ccp %>%
group_by(Well_ID_384) %>%
sample_n(1) %>%
#mutate(concentration_factor <- as.character(concen))
select(drug, concentration_factor, Well_ID_384) %>%
remove_rownames() %>%
as.data.frame() %>%
column_to_rownames("Well_ID_384")
ctg_ccp %>%
group_by(plate_barcode) %>%
mutate(median = median(photons, na.rm = TRUE)) %>%
mutate(treat.median = photons/median) %>%
#filter(date != "170606") %>%
select(plate_barcode, Well_ID_384, treat.median) %>%
spread(plate_barcode, treat.median) %>%
remove_rownames() %>%
column_to_rownames('Well_ID_384') %>%
pheatmap(
scale = "column",
cluster_rows = TRUE,
show_rownames = FALSE,
show_colnames = FALSE,
color = colorRampPalette(c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0'))(50),
#color = c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0'),
annotation_col = col_temp#,
#annotation_row = row
)
## Warning: Setting row names on a tibble is deprecated.
col_temp <- col %>%
select(Patient, Location, CMS)
ctg_ccp %>%
group_by(plate_barcode) %>%
mutate(median = median(photons, na.rm = TRUE)) %>%
mutate(treat.median = photons/median) %>%
#filter(date != "170606") %>%
select(plate_barcode, Well_ID_384, treat.median) %>%
spread(plate_barcode, treat.median) %>%
remove_rownames() %>%
column_to_rownames('Well_ID_384') %>%
pheatmap(
scale = "column",
cluster_rows = TRUE,
show_rownames = FALSE,
show_colnames = FALSE,
color = colorRampPalette(c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0'))(50),
#color = c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0'),
annotation_col = col_temp#,
#annotation_row = row
)
## Warning: Setting row names on a tibble is deprecated.
Different response profiles are visible
dodge <- position_dodge(width=0.08)
ctg_ccp %>%
mutate(line = as.factor(line)) %>%
group_by(line, drug, concentration_factor) %>%
dplyr::summarise(mean_photons = mean(treat.ctrl.median, na.rm = TRUE),
sd_photons = sd(treat.ctrl.median, na.rm = TRUE),
range_low_photons = range(treat.median)[1],
range_high_photons = range(treat.median)[2]) %>%
ggplot(aes(y = mean_photons, x = concentration_factor)) +
geom_point(position = dodge, stat = "identity", aes( colour = line)) +
geom_path(aes( colour = line), alpha = 1, position = dodge, size = 1) +
geom_errorbar(aes(ymin=mean_photons-sd_photons, ymax=mean_photons+sd_photons, group = line), width=0, position = dodge, size = 1, alpha = 0.5) +
scale_x_log10(breaks = c(7,14)) +
ylab("photon count to plate median") +
facet_wrap(~drug) +
xlab("concentration factor 5-fold") +
scale_colour_brewer(palette = "Set3") +
theme_minimal()
## Warning: Removed 93 rows containing missing values (geom_errorbar).
tmp <- ctg_ccp %>%
#filter(line == "D007T01") %>%
group_by(plate_barcode, drug) %>%
#select(plate_barcode, drug, concentration_factor, treat.ctrl.median) %>%
summarise(auc=PharmacoGx::computeAUC(concentration_factor, treat.ctrl.median, trunc = TRUE, area.type = "Fitted", verbose = TRUE, viability_as_pct = FALSE))
auc_ccp <- tmp %>%
#filter(drug == 'Nutlin3a') %>%
separate(plate_barcode, c("line", "plate", "library"), sep = c(7,11), remove = FALSE)
#store the auc data for efficient knitting
save(auc_ccp, file = paste0("/Volumes/Macintosh HD/Users/rindtorf/promise/local_data/auc_ccp_unfiltered_", substr(Sys.time(),1,10), ".Rdata"))
#other means of AUC calculation can be considered as well
#the potent inhibitor Bortezomib leads to a viability of almost 0, scaling to a positive control is currently avoided
ctg_ccp%>%filter(drug == "Bortezomib" & concentration_factor >= 0.2) %>% select(treat.ctrl.median) %>% .$treat.ctrl.median %>% median(na.rm = TRUE)
## [1] 0.006639004
#The name of the auc file is fixed since recalculating it during every run is costly
load(file = paste0("/Volumes/Macintosh HD/Users/rindtorf/promise/local_data/auc_ccp_unfiltered_2017-08-21.Rdata"))
#I don't know why this is necessary
auc_ccp <- auc_cpp
tmp <- auc_ccp %>%
filter(drug == 'VX-702') %>%
group_by(line, drug) %>%
summarise(auc.mean = mean(auc, na.rm = TRUE),
auc.min = min(auc),
auc.max = max(auc))
tmp.thres <- auc_ccp %>%
filter(drug == 'VX-702') %>%
group_by(drug) %>%
summarise(drug.thres = median(auc, na.rm = TRUE) + 2*mad(auc, na.rm = TRUE))# %>%
#merge(tmp, ., by = "drug")
#tmp.mad <- mad(tmp$auc, na.rm = TRUE)
#tmp.median <- median(tmp$auc, na.rm = TRUE)
dot <- tmp %>%
ggplot(aes(y = line, x = auc.mean, color = line)) +
geom_point() +
theme_minimal() +
scale_colour_brewer(palette = "Set3") +
geom_vline(data = tmp.thres, aes(xintercept = drug.thres)) +
theme(axis.text.y=element_blank()) +
facet_wrap(~drug)
dot
#density <- tmp %>%
# ggplot(aes(auc)) +
# geom_density() +
# geom_vline(xintercept = tmp.median + 2*tmp.mad) +
# theme_minimal()
#grid.arrange(dot, density, ncol=1, nrow =2, heights=c(4, 1.4)) %>%
# facet_wrap(~ line)
auc_ccp_scaled <- auc_ccp %>%
as_tibble() %>%
group_by(drug) %>%
summarise(drug.median = median(auc, na.rm = TRUE),
drug.mad = mad(auc, na.rm = TRUE)) %>%
merge(auc_ccp, ., by = "drug") %>%
mutate(rz.auc = (auc - drug.median)/drug.mad,
dif.auc = (auc - drug.median),
ratio.auc = (auc/drug.median),
ratio.auc.log = FitAR::glog(ratio.auc)- FitAR::glog(1),
auc.log = FitAR::glog(auc)) %>%
arrange(rz.auc)
row <- ctg_ccp %>%
group_by(drug) %>%
summarise(Target = sample(target, 1),
Pathway = sample(broad_summary, 1)) %>%
as.data.frame() %>%
remove_rownames() %>%
column_to_rownames('drug') %>%
dplyr::select(Pathway)
col <- auc_ccp %>%
ungroup() %>%
dplyr::select(plate_barcode, line) %>%
distinct(plate_barcode, .keep_all = TRUE) %>%
as.data.frame() %>%
remove_rownames() %>%
column_to_rownames('plate_barcode')
auc_ccp %>%
ungroup() %>%
dplyr::select(drug, plate_barcode, auc) %>%
spread(plate_barcode, auc) %>%
remove_rownames() %>%
column_to_rownames('drug') %>%
pheatmap(
#scale = "column",
cluster_rows = TRUE,
show_rownames = TRUE,
show_colnames = TRUE,
color = colorRampPalette(rev(c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0')))(15),
#color = colorRampPalette(c('#f9f902','#000000','#3701f9'))(10),
#color = c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0'),
annotation_col = col,
annotation_row = row,
cutree_rows = 5
)
## Warning: Setting row names on a tibble is deprecated.
Todo: Associate AUC with organoid genotypes etc.
dodge <- position_dodge(width=0.15)
ctg_ccp %>%
#filter(drug %in% c( "Methotrexate", "Birinapant", "Ponatinib", "Nutlin3a",
# "Panobinostat", "Erlotinib")) %>%
filter(drug %in% c( "5-FU", "Irinotecan / SN-38",
"Gefitinib", "Erlotinib")) %>%
mutate(line = as.factor(line),
drug = factor(drug, levels = c("Irinotecan / SN-38", "5-FU",
"Gefitinib", "Erlotinib"))) %>%
group_by(line, drug, concentration_factor) %>%
dplyr::summarise(mean_photons = mean(treat.ctrl.median, na.rm = TRUE),
sd_photons = sd(treat.ctrl.median, na.rm = TRUE),
range_low_photons = range(treat.median)[1],
range_high_photons = range(treat.median)[2]) %>%
ggplot(aes(y = mean_photons, x = concentration_factor)) +
geom_point(position = dodge, stat = "identity", aes( colour = line), size = 2) +
geom_path(aes( colour = line), alpha = 0.6, position = dodge, size = 2) +
#geom_errorbar(aes(ymin=mean_photons-sd_photons, ymax=mean_photons+sd_photons, group = line), width=0, position = dodge, size = 1, alpha = 0.5) +
#geom_ribbon( aes(x=concentration_factor, y=mean_photons, ymin=mean_photons-sd_photons, ymax=mean_photons+sd_photons), alpha=0.2) +
scale_x_log10(breaks = c(7,14)) +
ylab("Photon count normalized to control") +
facet_wrap(~drug) +
xlab("Concentration factor 5-fold") +
scale_color_manual(values=c(rep("#9E9A99", times = 10), "#F0390B", rep("#9E9A99", times = 1))) +
theme_minimal()
To compare a lines AUC with the median AUC of the same treatment a robust z score is calculated. To reduce the amount of false-positives a regression to the mean is guiding the analysis: The replicate with the smallest distance to the group median is defined to be relevant for further analysis. The stronger deviating replicate is discarded.
tmp <- auc_ccp %>%
#filter(drug == 'DMSO') %>%
group_by(line, drug) %>%
summarise(auc.mean = mean(auc, na.rm = TRUE),
auc.min = min(auc),
auc.max = max(auc))
tmp.rz <- auc_ccp %>%
as_tibble() %>%
group_by(drug) %>%
summarise(drug.thres = median(auc, na.rm = TRUE) + 2*mad(auc, na.rm = TRUE),
drug.median = median(auc, na.rm = TRUE),
drug.mad = mad(auc, na.rm = TRUE)) %>%
merge(tmp, ., by = "drug") %>%
#filter(drug == "Etoposid") %>%
mutate(rz.auc.mean = (auc.mean - drug.median)/drug.mad,
rz.auc.min = (auc.min - drug.median)/drug.mad,
rz.auc.max = (auc.max - drug.median)/drug.mad,
dif.auc.mean = (auc.mean - drug.median),
dif.auc.min = (auc.min - drug.median),
dif.auc.max = (auc.max - drug.median)) %>%
arrange(rz.auc.max)
p.rz <- tmp.rz %>%
filter(!grepl(.$drug, pattern = 'mit')) %>%
#filter(line == 'D027T01') %>%
#group_by(line) %>%
mutate(rz.auc = if_else(rz.auc.min<rz.auc.mean, rz.auc.min, rz.auc.max),
dif.auc = if_else(dif.auc.min<dif.auc.mean, dif.auc.min, dif.auc.max)) %>%
filter(drug.mad != 0) %>%
mutate(responder = if_else(rz.auc > 2, TRUE, FALSE))
comp.tmp <- p.rz %>%
filter(rz.auc > 2, dif.auc > 0.3) %>%
select(drug) %>%
.$drug %>%
unique()
dodge <- position_dodge(width=0.15)
ctg_ccp %>%
filter(drug %in% c( "Oxaliplatin mit 5-FU", comp.tmp)) %>%
#filter(drug %in% c( "Irinotecan / SN-38", "5-FU",
# "Gefitinib", "Erlotinib")) %>%
mutate(line = as.factor(line),
drug = factor(drug, levels = c( "Oxaliplatin mit 5-FU", comp.tmp))) %>%
group_by(line, drug, concentration_factor) %>%
dplyr::summarise(mean_photons = mean(treat.ctrl.median, na.rm = TRUE),
sd_photons = sd(treat.ctrl.median, na.rm = TRUE),
range_low_photons = range(treat.median)[1],
range_high_photons = range(treat.median)[2]) %>%
ggplot(aes(y = mean_photons, x = concentration_factor)) +
geom_point(position = dodge, stat = "identity", aes( colour = line), size = 2) +
geom_path(aes( colour = line), alpha = 0.6, position = dodge, size = 2) +
#geom_errorbar(aes(ymin=mean_photons-sd_photons, ymax=mean_photons+sd_photons, group = line), width=0, position = dodge, size = 1, alpha = 0.5) +
#geom_ribbon( aes(x=concentration_factor, y=mean_photons, ymin=mean_photons-sd_photons, ymax=mean_photons+sd_photons), alpha=0.2) +
scale_x_log10(breaks = c(7,14)) +
ylab("Photon count normalized to control") +
facet_wrap(~drug) +
xlab("Concentration factor 5-fold, mean of n=2") +
scale_colour_brewer(palette = "Set3") +
theme_minimal()
tmp <- p.rz %>%
filter(!grepl(.$drug, pattern = 'mit')) %>%
filter(line == 'D027T01') %>%
#group_by(line) %>%
mutate(rz.auc = if_else(rz.auc.min<rz.auc.mean, rz.auc.min, rz.auc.max),
dif.auc = if_else(dif.auc.min<dif.auc.mean, dif.auc.min, dif.auc.max)) %>%
filter(drug.mad != 0) %>%
mutate(responder = if_else(rz.auc > 2, TRUE, FALSE))
p.order <- arrange(tmp,desc( rz.auc))$drug
library(ggrepel)
set.seed(42)
#find a way to fix the dif.auc color range so one can see the effect in-vitro
tmp %>%
arrange(rz.auc) %>%
mutate(drug = factor(drug, levels = p.order)) %>%
ggplot(aes(x = drug, y = rz.auc, fill = dif.auc)) +
geom_bar(stat = 'identity') +
theme_minimal() +
scale_fill_gradient(high="red", low="blue") +
geom_label_repel(
data=subset(tmp, rz.auc > 1.5 | rz.auc < -1.5),
aes(x = drug, y = rz.auc, label = drug),
fontface = 'bold', color = 'white',
box.padding = unit(0.35, "lines"),
point.padding = unit(0.5, "lines"),
segment.color = 'grey50'
)
p.order <- arrange(tmp,desc( dif.auc))$drug
library(ggrepel)
set.seed(42)
#find a way to fix the dif.auc color range so one can see the effect in-vitro
tmp %>%
arrange(rz.auc) %>%
mutate(drug = factor(drug, levels = p.order)) %>%
ggplot(aes(x = drug, y = dif.auc, fill = rz.auc)) +
geom_bar(stat = 'identity') +
theme_minimal() +
scale_fill_gradient(high="red", low="blue") +
geom_label_repel(
data=subset(tmp, rz.auc > 1.5 | rz.auc < -1.5),
aes(x = drug, y = dif.auc, label = drug),
fontface = 'bold', color = 'white',
box.padding = unit(0.35, "lines"),
point.padding = unit(0.5, "lines"),
segment.color = 'grey50'
) +
theme(axis.text.x=element_blank()) +
ylab("Effect size compared to median AUC")
drug_targets
## # A tibble: 70 x 7
## input input_renamed target count translation narrow_summary
## <chr> <chr> <chr> <int> <chr> <chr>
## 1 Dasatinib Dasatinib ABL1 11 ABL1 ABL1
## 2 Ponatinib Ponatinib ABL1 5 ABL1 ABL1
## 3 AZD 5363 AZD 5363 AKT1 4 AKT1 PI3K/Akt
## 4 Crizotinib Crizotinib ALK 11 ALK ALK
## 5 ALISERTIB Alisertib AURKA 8 AURKA AURKA
## 6 Oxaliplatin Oxaliplatin BCL2 1 DNA damage DNA damage
## 7 Paclitaxel Paclitaxel BCL2 4 Cytoskeleton Cytoskeleton
## 8 Venetoclax Venetoclax BCL2 2 BCL2 BCL2
## 9 Birinapant Birinapant BIRC2 3 SMAC mimetic proapoptotic
## 10 Dabrafenib Dabrafenib BRAF 11 BRAF BRAF
## # ... with 60 more rows, and 1 more variables: broad_summary <chr>
select_anno <- drug_targets %>%
select(broad_summary) %>%
group_by(broad_summary) %>%
summarise(count = n()) %>%
filter(count > 1)
row <- drug_targets %>%
filter(broad_summary %in% select_anno$broad_summary) %>%
as.data.frame() %>%
column_to_rownames("input") %>%
select(broad_summary) %>%
mutate(broad_summary = factor(broad_summary))
colnames(row) <- c("Target")
# Specify colors
ann_colors <- list(
Patient = c(RColorBrewer::brewer.pal(12, "Set3")),
Location = RColorBrewer::brewer.pal(3, "Set1"),
CMS = RColorBrewer::brewer.pal(3, "Set2")
#Target = c(RColorBrewer::brewer.pal(12, "Paired"), '#c2c2b2', '#ff666a', '#ff0f15'))
)
#names(ann_colors$Target) <- unique(row$Target)
names(ann_colors$Patient) <- unique(col_temp$Patient)
names(ann_colors$Location) <- c("Rectum", "Left", "Right")
names(ann_colors$CMS) <- c("CMS1", "CMS2", "CMS3")
auc_ccp_scaled %>%
group_by(line, drug) %>%
summarise(auc.log = mean(auc.log, na.rm = TRUE)) %>%
select(line, drug, auc.log) %>%
filter(!drug %in% c("Alpelisib", "DMSO", "Vismodegib")) %>%
#filter(!grepl(.$drug, pattern = 'mit')) %>%
spread(line, auc.log) %>%
remove_rownames() %>%
column_to_rownames('drug') %>%
pheatmap(
scale = "row",
cluster_rows = TRUE,
show_rownames = TRUE,
show_colnames = FALSE,
color = colorRampPalette(rev(c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0')))(15),
#color = colorRampPalette(c('#f9f902','#000000','#3701f9'))(10),
#color = c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0'),
#annotation_col = col_temp,
#annotation_row = row,
cutree_rows = 5
)
## Warning: Setting row names on a tibble is deprecated.
auc_ccp_scaled %>%
group_by(plate_barcode) %>%
#mutate(median = median(photons, na.rm = TRUE)) %>%
select(plate_barcode, drug, auc.log) %>%
filter(!drug %in% c("Alpelisib", "DMSO", "Vismodegib")) %>%
#filter(!grepl(.$drug, pattern = 'mit')) %>%
spread(plate_barcode, auc.log) %>%
remove_rownames() %>%
column_to_rownames('drug') %>%
pheatmap(
scale = "row",
cluster_rows = TRUE,
show_rownames = TRUE,
show_colnames = FALSE,
color = colorRampPalette(rev(c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0')))(15),
#color = colorRampPalette(c('#f9f902','#000000','#3701f9'))(10),
#color = c('#ca0020','#f4a582','#f7f7f7','#92c5de','#0571b0'),
annotation_col = col_temp,
#annotation_row = row,
cutree_rows = 5,
annotation_colors = ann_colors
)
## Warning: Setting row names on a tibble is deprecated.
auc_ccp %>% ungroup %>% select(drug) %>%
merge(drug_targets, ., by.x = "input", by.y = "drug", all = TRUE) %>% distinct(input, .keep_all = TRUE) %>%
write.csv(., "~/promise/local_data/annotation/drug_targets_original.csv")
df mit drug, line, auc for each drug test cellline of interest auc ~ drug + cellline.of.interest
Run function to return shortcuts of organoid lines
show_shortcuts <- function(pattern, data, rows = 4){
treatment_of_interest <- as.character(data$drug)[grep(pattern, data$drug)] %>% unique()
defined_m_barcodes <- data %>%
filter(drug == treatment_of_interest) %>%
filter(date != "170606") %>% #currently only this makes sense
separate(plate, c("P", "plate.number") ,1) %>%
mutate(plate.number_m1 = as.numeric(plate.number)-1) %>%
mutate(plate.number_m1 =str_pad(plate.number_m1, 3, pad = 0)) %>%
mutate(plate_barcode_m1 = paste0(line, P, plate.number_m1, library)) %>%
select(drug, plate_barcode_m1, Well_ID_384, line, concentration_factor) %>%
arrange(line, plate_barcode_m1, concentration_factor) %>%
separate(Well_ID_384, c("letter", "number"), 1) %>%
select(plate_barcode_m1, line) %>%
group_by(line) %>%
sample_n(1) %>%
.$plate_barcode_m1
img.path <- data %>%
filter(drug == treatment_of_interest) %>%
filter(date != "170606") %>% #currently only this makes sense
separate(plate, c("P", "plate.number") ,1) %>%
mutate(plate.number_m1 = as.numeric(plate.number)-1) %>%
mutate(plate.number_m1 =str_pad(plate.number_m1, 3, pad = 0)) %>%
mutate(plate_barcode_m1 = paste0(line, P, plate.number_m1, library)) %>%
select(drug, plate_barcode_m1, Well_ID_384, line, concentration_factor) %>%
arrange(line, plate_barcode_m1, concentration_factor) %>%
separate(Well_ID_384, c("letter", "number"), 1) %>%
filter(plate_barcode_m1 %in% defined_m_barcodes) %>%
mutate(path = paste0("~/promise/html/",
plate_barcode_m1,
"/",
plate_barcode_m1,
"_",
letter,
"_",
number,
"_1.jpeg"))
if(nrow(img.path) > 41){img.path <- img.path %>% group_by(plate_barcode_m1) %>% sample_n(1) %>%
arrange(line, plate_barcode_m1)}
img <- readImage(img.path$path,
all = TRUE,
names = paste0(img.path$drug, "_", img.path$line))
img.rows = rows*-1
display(img[20:209, 20:209,,], method = "raster", all = TRUE, nx = rows, margin = 20, bg = "black")
return(img.path %>%
select(drug, line))
}
Print separate response curves for selected compounds
for(i in c("Ponatinib", "Afatinib", "AZD 4547")){
dodge <- position_dodge(width=0.15)
p <- ctg_ccp %>%
group_by(complete_barcode) %>%
mutate(ctrl.median = ifelse(drug == "DMSO", median(photons, na.rm = TRUE), NA),
median = median(photons, na.rm = TRUE),
mad = mad(photons, na.rm = TRUE)) %>%
mutate(treat.ctrl.median = photons/ctrl.median,
treat.median = photons/median,
treat.rob.z = (photons-median)/mad) %>%
#filter(drug == "Erlotinib") %>%
#filter(date != "170606") %>%
filter(drug %in% c(i)) %>%
mutate(line = as.factor(line)) %>%
group_by(line, drug, concentration_factor) %>%
dplyr::summarise(mean_photons = mean(treat.median, na.rm = TRUE),
sd_photons = sd(treat.median, na.rm = TRUE),
range_low_photons = range(treat.median)[1],
range_high_photons = range(treat.median)[2]) %>%
ggplot(aes(y = mean_photons, x = concentration_factor)) +
geom_point(position = dodge, stat = "identity", aes( colour = line), size = 4) +
geom_path(aes( colour = line), alpha = 0.6, position = dodge, size = 4) +
geom_errorbar(aes(ymin=mean_photons-sd_photons, ymax=mean_photons+sd_photons, group = line), width=0, position = dodge, size = 1, alpha = 0.5) +
scale_x_log10(breaks = c(7,14)) +
ylab("photon count normalized to plate median") +
facet_wrap(~drug) +
xlab("concentration factor 5-fold") +
scale_colour_brewer(palette = "Set1") +
theme_minimal()
print(p)
}
## Warning in mutate_impl(.data, dots): Unequal factor levels: coercing to
## character
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
## Warning in mutate_impl(.data, dots): Unequal factor levels: coercing to
## character
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_errorbar).
## Warning in mutate_impl(.data, dots): Unequal factor levels: coercing to
## character
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_errorbar).
Print shortcuts for Methotrexate treated organoids
show_shortcuts("Metho", ctg_ccp, rows = 5)
## Warning in file(url, "rb"): cannot open file '/Users/rindtorf/promise/html/
## D013T01P007L08/D013T01P007L08_M_01_1.jpeg': No such file or directory
## Warning in FUN(X[[i]], ...): cannot open the connection
## Warning in file(url, "rb"): cannot open file '/Users/rindtorf/promise/html/
## D015T01P007L08/D015T01P007L08_P_16_1.jpeg': No such file or directory
## Warning in FUN(X[[i]], ...): cannot open the connection
## Warning in file(url, "rb"): cannot open file '/Users/rindtorf/promise/html/
## D018T01P007L08/D018T01P007L08_C_18_1.jpeg': No such file or directory
## Warning in FUN(X[[i]], ...): cannot open the connection
## Warning in file(url, "rb"): cannot open file '/Users/rindtorf/promise/html/
## D020T01P007L08/D020T01P007L08_M_01_1.jpeg': No such file or directory
## Warning in FUN(X[[i]], ...): cannot open the connection
## Warning in file(url, "rb"): cannot open file '/Users/rindtorf/promise/html/
## D021T01P003L08/D021T01P003L08_O_16_1.jpeg': No such file or directory
## Warning in FUN(X[[i]], ...): cannot open the connection
## Warning in file(url, "rb"): cannot open file '/Users/rindtorf/promise/html/
## D022T01P007L08/D022T01P007L08_M_01_1.jpeg': No such file or directory
## Warning in FUN(X[[i]], ...): cannot open the connection
## Warning in file(url, "rb"): cannot open file '/Users/rindtorf/promise/html/
## D027T01P007L08/D027T01P007L08_C_18_1.jpeg': No such file or directory
## Warning in FUN(X[[i]], ...): cannot open the connection
## Warning in file(url, "rb"): cannot open file '/Users/rindtorf/promise/html/
## D030T01P007L08/D030T01P007L08_P_16_1.jpeg': No such file or directory
## Warning in FUN(X[[i]], ...): cannot open the connection
## Adding missing grouping variables: `plate_barcode_m1`
## # A tibble: 12 x 3
## # Groups: plate_barcode_m1 [12]
## plate_barcode_m1 drug line
## <chr> <chr> <chr>
## 1 D004T01P009L08 Methotrexate D004T01
## 2 D007T01P009L08 Methotrexate D007T01
## 3 D010T01P011L08 Methotrexate D010T01
## 4 D013T01P007L08 Methotrexate D013T01
## 5 D015T01P007L08 Methotrexate D015T01
## 6 D018T01P007L08 Methotrexate D018T01
## 7 D019T01P003L08 Methotrexate D019T01
## 8 D020T01P007L08 Methotrexate D020T01
## 9 D021T01P003L08 Methotrexate D021T01
## 10 D022T01P007L08 Methotrexate D022T01
## 11 D027T01P007L08 Methotrexate D027T01
## 12 D030T01P007L08 Methotrexate D030T01
Load CTG Proliferation data into R and perform first annotation of the dataset
#define datasets to load ctg data into R
assay <- "ctg_data/proliferation/proliferation_true"
#path <- '/Volumes/collab-bernd-fischer/PROMISE/'
path <- '/Volumes/Macintosh HD/Users/rindtorf/promise/local_data/'
pattern <- '_Prolif_'
filelist <- list.files(paste0(path, assay, "/"), pattern = pattern, recursive = TRUE, full.names = TRUE) #use this for importing the data
#dir <- paste0(path, assay, "/")
#define a function to load proliferation ctg files into R once they match the given definitions
load_delim <- function(full.name){
plate_name <- full.name %>%
as.tibble() %>%
separate(value, c(letters[1:13]), sep = "/") %>%
select(m) %>%
separate(m, c(letters[1:2]), sep = -5) %>%
.$a
read_delim(full.name,
"\t", escape_double = FALSE, col_names = FALSE,
trim_ws = TRUE) %>%
mutate(plate_name = plate_name)
}
#load ctg data into R
tmp_list <- lapply(filelist, load_delim)
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
ctg_prolif <- do.call('rbind', tmp_list)
colnames(ctg_prolif) <- c('original_name', 'well_id_384', 'photons', "plate_barcode")
#mutate ctg data to harness annotation
ctg_prolif <- ctg_prolif %>%
separate(plate_barcode, c("tmp.plate_barcode", "tmp.mu"), sep = -7, remove = FALSE) %>%
mutate(tmp.mu = substr(tmp.mu, 2,6)) %>%
separate(tmp.mu, c("mithras", "user"), sep = "_") %>%
separate(tmp.plate_barcode, c("date", "experiment", "timepoint", "lines" ), sep= "_", extra = "merge") %>%
mutate(no_lines = str_count(lines, "D0")) %>%
separate(well_id_384, c("letter", "number"), sep = 1, remove = FALSE)
Filter the proliferation dataset further by removing empty wells
#data from seeding timepoints was generated in 96 well plates. For now it will be removed from the dataset to enable standardized analysis
tmp <- ctg_prolif %>%
filter(! timepoint %in% c("seeding"))
#on two dates (170613 and 170724) not the complete plate was filled with organoids. Here the unseeded wells are removed from the dataset.
tmp <- tmp %>%
filter(!(date %in% c("170613") & number %in% 13:24))%>%
filter(!(date %in% c("170724") & number %in% 21:24))
#the data has the following distribution
tmp %>%
ggplot(aes(photons)) +
geom_density(adjust = 0.3) +
scale_x_continuous(limits = c(0,50000)) +
theme_minimal() +
ggtitle("Distribution of Photon counts for all Proliferation Experiments")
#one single well in the dataset has a photon count of exactly 0 (second lowest value being 658). It is set to NA
tmp <- tmp %>%
mutate(photons = replace(photons, which(photons == 0), NA))
#During two seeding-dates mistakes were made. These seeding-dates are excluded later during the analysis
#filter(!(date %in% c("170606", "170515")))
#summary stats for the dataset
summary(tmp$photons)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 658 6170 10100 10802 14618 42090 1
#write tmp to load it into HTSVis
tmp %>%
filter(timepoint != "seeding") %>%
save(., file="tmp.Rdata")
#redefine ctg_prolig
ctg_prolif <- tmp
Complete the annotation of the dataset by linking organoid lines to photon counts and time
#get an overview of the lines and timepoints available. On each seeding-date there was only one set of lines processed and the plate-layout did not change between measurments.
ctg_prolif %>%
group_by(date, timepoint) %>%
summarise(n = n(),
lines = sample(lines, 1))
## # A tibble: 20 x 4
## # Groups: date [?]
## date timepoint n lines
## <chr> <chr> <int> <chr>
## 1 170515 d2 384 D004T01_D010T01_D007T01_D019T01
## 2 170515 d4 384 D004T01_D010T01_D007T01_D019T01
## 3 170529 d2 384 D004T01_D007T01_D010T01_D019T01_D022T01_D030T01
## 4 170529 d4 384 D004T01_D007T01_D010T01_D019T01_D022T01_D030T01
## 5 170529 d8 384 D004T01_D007T01_D010T01_D019T01_D022T01_D030T01
## 6 170606 d2 384 D004T01_D007T01_D010T01_D019T01_D022T01_D030T01
## 7 170606 d4 384 D004T01_D007T01_D010T01_D019T01_D022T01_D030T01
## 8 170606 d8 384 D004T01_D007T01_D010T01_D019T01_D022T01_D030T01
## 9 170613 d2 192 D020T01_D027T01_D030T01
## 10 170613 d4 192 D020T01_D027T01_D030T01
## 11 170613 d8 192 D020T01_D027T01_D030T01
## 12 170711 d2 384 D013T01_D015T01_D018T01_D020T01_D021T01_D027T01
## 13 170711 d4 384 D013T01_D015T01_D018T01_D020T01_D021T01_D027T01
## 14 170711 d8 384 D013T01_D015T01_D018T01_D020T01_D021T01_D027T01
## 15 170717 d2 384 D013T01_D015T01_D018T01_D021T01
## 16 170717 d4 384 D013T01_D015T01_D018T01_D021T01
## 17 170717 d8 384 D013T01_D015T01_D018T01_D021T01
## 18 170724 d2 320 D004T01_D007T01_D010T01_D019T01_D022T01
## 19 170724 d4 320 D004T01_D007T01_D010T01_D019T01_D022T01
## 20 170724 d8 320 D004T01_D007T01_D010T01_D019T01_D022T01
#define a function to annotate a dataset for a defined seeding-date
annotate_prolif <- function(df){
tmp.lines <- df %>%
select(lines) %>%
.$lines %>% #turn output into a string
unique() %>%
str_split(., "_") %>%
unlist()
tmp.anno <- tibble(number = df %>% .$number %>% unique(),
ratio = length(number)/length(tmp.lines),
anno_lines = rep(tmp.lines, each = ratio),
no_lines = df %>% .$no_lines %>% unique(),
#for control reasons the product of the columsn per line and the number of lines is calculated
anno_number = no_lines*ratio)
return(tmp.anno)
}
#redefine and annotate the ctg_prolif dataset
ctg_prolif <- ctg_prolif %>%
group_by(date) %>%
do(annotate_prolif(.)) %>%
merge(ctg_prolif, ., by = c("date", "number", "no_lines"))
ctg_prolif %>%
group_by(date, timepoint, anno_lines) %>%
summarise(n = n())
## # A tibble: 98 x 4
## # Groups: date, timepoint [?]
## date timepoint anno_lines n
## <chr> <chr> <chr> <int>
## 1 170515 d2 D004T01 96
## 2 170515 d2 D007T01 96
## 3 170515 d2 D010T01 96
## 4 170515 d2 D019T01 96
## 5 170515 d4 D004T01 96
## 6 170515 d4 D007T01 96
## 7 170515 d4 D010T01 96
## 8 170515 d4 D019T01 96
## 9 170529 d2 D004T01 64
## 10 170529 d2 D007T01 64
## # ... with 88 more rows
Gain a first overview about the proliferation dataset. Proliferation data for lines D015T01 and D030T01 appear discordant. Proliferation data for D019T01, D021T01 and D022T01 appear to follow a linear growth pattern
tmp <- ctg_prolif %>%
filter(!(number %in% c(1,24) | letter %in% c("A", "P")))
#wells loacted on the edge of the plates are removed. However, this filtration step has no impact on the median photon count per plate.
tmp %>%
group_by(date, timepoint, anno_lines) %>%
summarise(median = median(photons, na.rm = TRUE),
mad = mad(photons, na.rm = TRUE)) %>%
ggplot(aes(timepoint, median, color = date)) +
geom_point() +
geom_path(aes(group = date)) +
facet_wrap(~anno_lines) +
theme_minimal()
tmp <- tmp %>%
mutate(timepoint_num = substr(timepoint,2,2) %>% as.numeric(),
photons_log2 = log2(photons)) %>%
filter(!(date %in% c("170515", "170606"))) #seeding-dates that contained a protocol-deviation are removed from the dataset
tmp %>%
group_by(date, timepoint_num, anno_lines) %>%
summarise(median = median(photons, na.rm = TRUE),
mad = mad(photons, na.rm = TRUE)) %>%
ggplot(aes(timepoint_num, median, color = date)) +
geom_point() +
geom_path(aes(group = date)) +
facet_wrap(~anno_lines) +
theme_minimal() +
ggtitle("Proliferation curves for 12 organoid lines") +
ylab("Median photon count for each timepoint") +
xlab("Time (d)")
tmp %>%
group_by(date, timepoint_num, anno_lines) %>%
summarise(median = median(photons_log2, na.rm = TRUE),
mad = mad(photons_log2, na.rm = TRUE)) %>%
ggplot(aes(timepoint_num, median, color = date)) +
geom_point() +
geom_path(aes(group = date)) +
facet_wrap(~anno_lines) +
theme_minimal() +
scale_x_continuous(limits = c(2,4), breaks = c(2,4)) +
ggtitle("Proliferation curves for 12 organoid lines over 4 days") +
ylab("Log2 of median photon count for each timepoint") +
xlab("Time (d)")
## Warning: Removed 24 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_path).
ctg_prolif <- tmp
needs code review!
#needs code review!
test <- ctg_prolif %>%
select(timepoint_num, date, anno_lines, photons_log2) %>%
filter(timepoint_num %in% c(2,4)) %>%
group_by(anno_lines) %>%
do(fit_growth(df = ., time = timepoint_num, data = photons_log2, model = "linear") %>% broom::tidy())
ctg_prolif %>%
select(timepoint_num, date, anno_lines, photons_log2) %>%
group_by(date, timepoint_num, anno_lines) %>%
ggplot(aes(timepoint_num, photons_log2)) +
geom_point() +
stat_growthcurve(model = "linear") +
facet_wrap(~anno_lines) +
theme_minimal() +
scale_x_continuous(limits = c(2,4), breaks = c(2,4)) +
ggtitle("Proliferation curves for 12 organoid lines over 4 days") +
ylab("Log2 of median photon count for each timepoint") +
xlab("Time (d)")
## Warning: Removed 1414 rows containing non-finite values (stat_growth_curve).
## Warning: Removed 1414 rows containing missing values (geom_point).
tmp <- test %>%
group_by(anno_lines) %>%
summarise(d3 = estimate[1]+estimate[2]*3) %>%
mutate(d3_lin = 2^d3,
line = anno_lines) %>%
select(-anno_lines) %>%
merge(ctg_ccp, ., by = "line")
head(tmp)
## line plate_barcode plate Well_ID_384 complete_barcode
## 1 D004T01 D004T01P006L08 P006 J23 170502_NR_M2_D004T01P006L08
## 2 D004T01 D004T01P006L08 P006 D01 170502_NR_M2_D004T01P006L08
## 3 D004T01 D004T01P006L08 P006 A02 170502_NR_M2_D004T01P006L08
## 4 D004T01 D004T01P006L08 P006 P06 170502_NR_M2_D004T01P006L08
## 5 D004T01 D004T01P006L08 P006 P17 170502_NR_M2_D004T01P006L08
## 6 D004T01 D004T01P006L08 P006 K22 170502_NR_M2_D004T01P006L08
## photons date user mithras library concentration_factor target count
## 1 5494 170502 NR M2 L08 1.0000 AKT1 4
## 2 10486 170502 NR M2 L08 0.0016 ABL1 11
## 3 NA 170502 NR M2 L08 0.0016 PTK2 1
## 4 12592 170502 NR M2 L08 0.0016 <NA> NA
## 5 11874 170502 NR M2 L08 1.0000 MGAM 5
## 6 4788 170502 NR M2 L08 0.2000 PLK1 3
## translation narrow_summary broad_summary drug
## 1 AKT1 PI3K/Akt PI3K/Akt AZD 5363
## 2 ABL1 ABL1 ABL1 Dasatinib
## 3 FAK FAK FAK Defactinib
## 4 DNA damage DNA damage DNA damage Trifluoridin/Tipiracil
## 5 alpha-Glucosidase antidiabetic Metabolism Miglitol
## 6 PLK1 PLK1 Cell cycle Volasertib
## control drug_conc letter number replicate dummy
## 1 0 AZD 53631 J 23 1 1
## 2 0 Dasatinib0.0016 D 1 1 1
## 3 0 Defactinib0.0016 A 2 1 1
## 4 0 Trifluoridin/Tipiracil0.0016 P 6 1 1
## 5 0 Miglitol1 P 17 1 1
## 6 0 Volasertib0.2 K 22 1 1
## ctrl.median ctrl.mad median mad treat.ctrl.median treat.median
## 1 9325 1012.616 8738 1650.134 0.5891689 0.6287480
## 2 9325 1012.616 8738 1650.134 1.1245040 1.2000458
## 3 9325 1012.616 8738 1650.134 NA NA
## 4 9325 1012.616 8738 1650.134 1.3503485 1.4410620
## 5 9325 1012.616 8738 1650.134 1.2733512 1.3588922
## 6 9325 1012.616 8738 1650.134 0.5134584 0.5479515
## d3 d3_lin
## 1 12.82019 7232.079
## 2 12.82019 7232.079
## 3 12.82019 7232.079
## 4 12.82019 7232.079
## 5 12.82019 7232.079
## 6 12.82019 7232.079
test <- ctg_ccp %>%
filter(drug == "DMSO") %>%
group_by(line, plate) %>%
summarise(median = median(photons, na.rm = TRUE))
#perhaps the estimation of doubling time is improved if positional effects are respected and every well is handled as a separate experiment
ctg_prolif %>%
filter(anno_lines == "D030T01" ) %>%
filter(letter %in% c("B", "C")) %>%
ggplot(aes(timepoint, photons, color = well_id_384, group = well_id_384)) +
geom_point() +
geom_line(aes(group = well_id_384)) +
#facet_wrap(~anno_lines) +
theme_minimal()